load libs

library(splines)
library(stringr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(DBI)

Load exploration

# please uncoments the following code to download the database from github if 
# do not want to download the file from github manually nor run the download or explore
# notebook

# fileURL <- "https://github.com/ccb-hms/Imputation/blob/main/nhanes.sqlite"
# if(!file.exists("nhanes.sqlite")){
#     res <- tryCatch(download.file(fileURL,
#                               destfile="./nhanes.sqlite",
#                               method="auto"),
#                 error=function(e) 1)
# }


nhanes_db <- dbConnect(RSQLite::SQLite(), "nhanes.sqlite")

# list all of the tables
dbListTables(nhanes_db)
## [1] "blood_cholesterol"     "body_measures"         "current_health_status"
## [4] "demo"                  "diabetes"              "diet_total"           
## [7] "medical_conditions"    "merged_table"          "var_decr"
cols <- 'BMXWAIST , RIDAGEYR, BMXHT, BMXWT, BMXBMI, RIAGENDR, years, DR1TM161, WTDRD1, BMXLEG, BMXARML '
data_sql <- paste0('SELECT ', cols, 'FROM merged_table')

dbListTables(nhanes_db)
## [1] "blood_cholesterol"     "body_measures"         "current_health_status"
## [4] "demo"                  "diabetes"              "diet_total"           
## [7] "medical_conditions"    "merged_table"          "var_decr"
data <- dbGetQuery(nhanes_db, data_sql)
data <- na.omit(data)

dbDisconnect(nhanes_db)


train_ix <- sample(x = 1:nrow(data), size = 5000)
test_ix <- sample(x = setdiff(1:nrow(data), train_ix), 3000)

train_data <- data[train_ix, ]
test_data <- data[test_ix, ]

Inverse Normal Distribution

invNorm <- function(x) {qnorm((rank(x) - 3/8)/(length(x) +1 - 6/8))}

mean_square_error <- function(y_true, y_pred){
    round(mean((y_true - y_pred)^2),4)
}

plot_density <- function(data,data_name,col='red'){
    d <- density(data)
    plot(d,main=paste(data_name,"Density"))
    polygon(d, col=col, border="blue")
  }

Load exploration

qplot(x=BMXHT,y=BMXWAIST,data=data,colour=RIAGENDR,alpha=I(0.1))

qplot(x=RIDAGEYR,y=BMXWAIST,data=data,colour=RIAGENDR,alpha=I(0.1))

qplot(x=BMXWT,y=BMXWAIST,data=data,colour=RIAGENDR,alpha=I(0.1))

regression models

test_df <- test_data |> select(-BMXWAIST)



run_model <- function(formula_str,train_data_set=train_data,
                      test_data_set=test_df){
  # setup regression model
  lm_reg = lm(formula = as.formula(formula_str), train_data_set)
  print(summary(lm_reg))
  
  # run prediction
  lm_pred = predict(lm_reg, newdata = test_df, se = T)
  
  # save prediction results
  pred_df = data.frame(
    fit = lm_pred$fit,
    weight = test_data$BMXWT,
    sex = test_data$RIAGENDR,
    label = test_data$BMXWAIST
  )
  # compute MSE
  mse<- mean_square_error(pred_df$fit, pred_df$label)
  
  #plot results
  g <-  ggplot(pred_df, aes(x = weight, y = label)) + geom_point(colour = "black",alpha = 0.1) +
    geom_point(aes(x = weight, y = fit, colour = sex,alpha = 0.1),
              size = 1.5) + ylab("waist circumference")
  
  g+ggtitle(paste("MSE = ",mse))
}

regression models weight

run_model("BMXWAIST ~ BMXWT")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.9731  -4.9555  -0.3079   4.9367  29.6161 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 44.014485   0.416713   105.6   <2e-16 ***
## BMXWT        0.679405   0.004972   136.6   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.186 on 4998 degrees of freedom
## Multiple R-squared:  0.7888, Adjusted R-squared:  0.7888 
## F-statistic: 1.867e+04 on 1 and 4998 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(BMXWT)")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -26.8911  -4.9106  -0.2857   4.9437  29.0958 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  63.6842     0.8579   74.23   <2e-16 ***
## bs(BMXWT)1   41.7630     2.4755   16.87   <2e-16 ***
## bs(BMXWT)2   75.0338     1.9124   39.23   <2e-16 ***
## bs(BMXWT)3   99.9140     3.2283   30.95   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.154 on 4996 degrees of freedom
## Multiple R-squared:  0.7908, Adjusted R-squared:  0.7906 
## F-statistic:  6294 on 3 and 4996 DF,  p-value: < 2.2e-16
## Warning in bs(BMXWT, degree = 3L, knots = numeric(0), Boundary.knots = c(33.2, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + BMXWT + RIAGENDR + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.4842  -3.9345  -0.0767   3.9626  28.7827 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           37.757744   0.667719  56.547  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1  0.868468   0.981494   0.885  0.37628    
## bs(RIDAGEYR, df = 7)2  0.991158   0.676366   1.465  0.14287    
## bs(RIDAGEYR, df = 7)3  2.294245   0.766176   2.994  0.00276 ** 
## bs(RIDAGEYR, df = 7)4  4.669268   0.682401   6.842 8.73e-12 ***
## bs(RIDAGEYR, df = 7)5  9.482672   0.857544  11.058  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6  9.862797   0.937849  10.516  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 12.172864   1.098358  11.083  < 2e-16 ***
## BMXWT                  0.723016   0.004455 162.280  < 2e-16 ***
## RIAGENDRMale          -4.103855   0.179771 -22.828  < 2e-16 ***
## years2005-2006        -0.017005   0.370898  -0.046  0.96343    
## years2007-2008         0.064039   0.347084   0.185  0.85362    
## years2009-2010        -0.202011   0.345229  -0.585  0.55847    
## years2013-2014         0.313321   0.359356   0.872  0.38331    
## years2015-2016         1.091476   0.351646   3.104  0.00192 ** 
## years2017-2018         0.354006   0.365483   0.969  0.33279    
## years2022-2012        -0.257131   0.363763  -0.707  0.47968    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.064 on 4983 degrees of freedom
## Multiple R-squared:  0.8501, Adjusted R-squared:  0.8496 
## F-statistic:  1766 on 16 and 4983 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ ns(RIDAGEYR, df = 7) + BMXWT + RIAGENDR + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.5761  -3.9386  -0.0617   3.9344  28.7022 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           37.815716   0.585583  64.578  < 2e-16 ***
## ns(RIDAGEYR, df = 7)1  1.266134   0.560684   2.258 0.023977 *  
## ns(RIDAGEYR, df = 7)2  2.621256   0.699723   3.746 0.000182 ***
## ns(RIDAGEYR, df = 7)3  4.773123   0.634407   7.524 6.28e-14 ***
## ns(RIDAGEYR, df = 7)4  6.761823   0.623801  10.840  < 2e-16 ***
## ns(RIDAGEYR, df = 7)5  9.518455   0.604527  15.745  < 2e-16 ***
## ns(RIDAGEYR, df = 7)6 11.743722   1.084512  10.829  < 2e-16 ***
## ns(RIDAGEYR, df = 7)7 10.489553   0.665906  15.752  < 2e-16 ***
## BMXWT                  0.723027   0.004455 162.300  < 2e-16 ***
## RIAGENDRMale          -4.107483   0.179788 -22.846  < 2e-16 ***
## years2005-2006        -0.014358   0.370921  -0.039 0.969123    
## years2007-2008         0.031140   0.346287   0.090 0.928351    
## years2009-2010        -0.242658   0.344236  -0.705 0.480895    
## years2013-2014         0.284803   0.358807   0.794 0.427379    
## years2015-2016         1.047251   0.350770   2.986 0.002844 ** 
## years2017-2018         0.326165   0.364711   0.894 0.371199    
## years2022-2012        -0.282681   0.363222  -0.778 0.436454    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.064 on 4983 degrees of freedom
## Multiple R-squared:  0.8501, Adjusted R-squared:  0.8496 
## F-statistic:  1766 on 16 and 4983 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXWT) + RIAGENDR + BMXHT")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.6985  -3.2686   0.0629   3.2866  30.0961 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           126.51084    1.81254  69.798  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1   0.76657    0.83547   0.918  0.35891    
## bs(RIDAGEYR, df = 7)2   0.72922    0.57508   1.268  0.20485    
## bs(RIDAGEYR, df = 7)3   2.05288    0.65192   3.149  0.00165 ** 
## bs(RIDAGEYR, df = 7)4   3.58750    0.57951   6.191 6.48e-10 ***
## bs(RIDAGEYR, df = 7)5   7.62294    0.72950  10.450  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6   7.89324    0.79475   9.932  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7  10.23133    0.93061  10.994  < 2e-16 ***
## bs(BMXWT)1             55.10227    1.81280  30.396  < 2e-16 ***
## bs(BMXWT)2             88.47071    1.40174  63.115  < 2e-16 ***
## bs(BMXWT)3            115.44075    2.34801  49.165  < 2e-16 ***
## RIAGENDRMale            0.81086    0.19962   4.062 4.94e-05 ***
## BMXHT                  -0.45093    0.01077 -41.878  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.161 on 4987 degrees of freedom
## Multiple R-squared:  0.8913, Adjusted R-squared:  0.891 
## F-statistic:  3408 on 12 and 4987 DF,  p-value: < 2.2e-16
## Warning in bs(BMXWT, degree = 3L, knots = numeric(0), Boundary.knots = c(33.2, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + BMXWT + RIAGENDR + BMXHT + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.3755  -3.3134   0.0594   3.2993  29.1634 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           103.443472   1.775803  58.252  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1   1.092503   0.858321   1.273 0.203135    
## bs(RIDAGEYR, df = 7)2   0.749962   0.591504   1.268 0.204896    
## bs(RIDAGEYR, df = 7)3   2.328956   0.670010   3.476 0.000513 ***
## bs(RIDAGEYR, df = 7)4   3.920739   0.597056   6.567 5.67e-11 ***
## bs(RIDAGEYR, df = 7)5   7.883671   0.751020  10.497  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6   8.415141   0.820967  10.250  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7   9.872650   0.962292  10.260  < 2e-16 ***
## BMXWT                   0.788170   0.004236 186.047  < 2e-16 ***
## RIAGENDRMale            1.041177   0.204865   5.082 3.87e-07 ***
## BMXHT                  -0.433459   0.011067 -39.167  < 2e-16 ***
## years2005-2006         -0.050623   0.324346  -0.156 0.875979    
## years2007-2008         -0.209820   0.303600  -0.691 0.489531    
## years2009-2010         -0.429730   0.301954  -1.423 0.154751    
## years2013-2014         -0.028985   0.314373  -0.092 0.926544    
## years2015-2016          0.244278   0.308269   0.792 0.428156    
## years2017-2018         -0.304234   0.320051  -0.951 0.341865    
## years2022-2012         -0.394561   0.318125  -1.240 0.214934    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.303 on 4982 degrees of freedom
## Multiple R-squared:  0.8854, Adjusted R-squared:  0.885 
## F-statistic:  2263 on 17 and 4982 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + invNorm(BMXWT) + RIAGENDR + BMXHT + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -21.3084  -3.5766  -0.0816   3.4916  30.7634 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           170.02722    1.99757  85.117  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1   0.34416    0.90701   0.379  0.70437    
## bs(RIDAGEYR, df = 7)2   0.62591    0.62513   1.001  0.31675    
## bs(RIDAGEYR, df = 7)3   1.85213    0.70813   2.616  0.00894 ** 
## bs(RIDAGEYR, df = 7)4   3.13323    0.63107   4.965 7.10e-07 ***
## bs(RIDAGEYR, df = 7)5   7.49316    0.79377   9.440  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6   6.97959    0.86756   8.045 1.07e-15 ***
## bs(RIDAGEYR, df = 7)7  10.74823    1.01711  10.567  < 2e-16 ***
## invNorm(BMXWT)         16.18351    0.09271 174.563  < 2e-16 ***
## RIAGENDRMale            0.30981    0.21636   1.432  0.15224    
## BMXHT                  -0.44434    0.01174 -37.863  < 2e-16 ***
## years2005-2006         -0.37844    0.34279  -1.104  0.26964    
## years2007-2008         -0.19696    0.32084  -0.614  0.53932    
## years2009-2010         -0.06324    0.31908  -0.198  0.84289    
## years2013-2014          0.33212    0.33221   1.000  0.31749    
## years2015-2016          0.57295    0.32571   1.759  0.07863 .  
## years2017-2018          0.13966    0.33812   0.413  0.67959    
## years2022-2012         -0.35054    0.33619  -1.043  0.29715    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.604 on 4982 degrees of freedom
## Multiple R-squared:  0.872,  Adjusted R-squared:  0.8715 
## F-statistic:  1996 on 17 and 4982 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + BMXWT + RIAGENDR + invNorm(BMXHT) + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.5708  -3.3475   0.0416   3.3121  28.7053 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           31.158108   0.610040  51.076  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1  1.044600   0.860906   1.213 0.225045    
## bs(RIDAGEYR, df = 7)2  0.739225   0.593293   1.246 0.212835    
## bs(RIDAGEYR, df = 7)3  2.297568   0.672032   3.419 0.000634 ***
## bs(RIDAGEYR, df = 7)4  3.898765   0.598883   6.510 8.25e-11 ***
## bs(RIDAGEYR, df = 7)5  7.871112   0.753328  10.448  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6  8.424714   0.823451  10.231  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7  9.805344   0.965342  10.157  < 2e-16 ***
## BMXWT                  0.786882   0.004243 185.468  < 2e-16 ***
## RIAGENDRMale           0.835417   0.202937   4.117 3.91e-05 ***
## invNorm(BMXHT)        -4.324482   0.111848 -38.664  < 2e-16 ***
## years2005-2006        -0.049070   0.325325  -0.151 0.880113    
## years2007-2008        -0.201421   0.304514  -0.661 0.508354    
## years2009-2010        -0.431643   0.302867  -1.425 0.154165    
## years2013-2014        -0.028603   0.315324  -0.091 0.927728    
## years2015-2016         0.258252   0.309190   0.835 0.403615    
## years2017-2018        -0.276951   0.320990  -0.863 0.388287    
## years2022-2012        -0.398972   0.319087  -1.250 0.211229    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.319 on 4982 degrees of freedom
## Multiple R-squared:  0.8847, Adjusted R-squared:  0.8843 
## F-statistic:  2248 on 17 and 4982 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXWT) + RIAGENDR + BMXHT + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.5990  -3.2740   0.0656   3.2732  30.2603 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           126.28530    1.83903  68.670  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1   0.80575    0.83563   0.964  0.33497    
## bs(RIDAGEYR, df = 7)2   0.71904    0.57565   1.249  0.21170    
## bs(RIDAGEYR, df = 7)3   2.08042    0.65222   3.190  0.00143 ** 
## bs(RIDAGEYR, df = 7)4   3.60169    0.58128   6.196 6.25e-10 ***
## bs(RIDAGEYR, df = 7)5   7.64678    0.73095  10.461  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6   7.88009    0.79947   9.857  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7  10.28021    0.93670  10.975  < 2e-16 ***
## bs(BMXWT)1             55.21868    1.81434  30.435  < 2e-16 ***
## bs(BMXWT)2             88.36788    1.40271  62.998  < 2e-16 ***
## bs(BMXWT)3            115.51207    2.34967  49.161  < 2e-16 ***
## RIAGENDRMale            0.79282    0.19998   3.965 7.46e-05 ***
## BMXHT                  -0.44914    0.01081 -41.553  < 2e-16 ***
## years2005-2006         -0.19173    0.31571  -0.607  0.54367    
## years2007-2008         -0.20056    0.29545  -0.679  0.49729    
## years2009-2010         -0.30785    0.29390  -1.047  0.29495    
## years2013-2014          0.08846    0.30598   0.289  0.77252    
## years2015-2016          0.31072    0.30003   1.036  0.30044    
## years2017-2018         -0.17265    0.31153  -0.554  0.57947    
## years2022-2012         -0.40186    0.30955  -1.298  0.19428    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.16 on 4980 degrees of freedom
## Multiple R-squared:  0.8915, Adjusted R-squared:  0.8911 
## F-statistic:  2154 on 19 and 4980 DF,  p-value: < 2.2e-16
## Warning in bs(BMXWT, degree = 3L, knots = numeric(0), Boundary.knots = c(33.2, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXWT,df=7) + RIAGENDR + bs(BMXHT,df=7) + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.8374  -3.2431   0.0935   3.2656  30.1903 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            65.2612     4.3252  15.089  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1   0.8267     0.8361   0.989 0.322810    
## bs(RIDAGEYR, df = 7)2   0.7281     0.5764   1.263 0.206507    
## bs(RIDAGEYR, df = 7)3   2.0879     0.6524   3.200 0.001381 ** 
## bs(RIDAGEYR, df = 7)4   3.6066     0.5817   6.200 6.11e-10 ***
## bs(RIDAGEYR, df = 7)5   7.7005     0.7318  10.522  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6   7.8766     0.8003   9.842  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7  10.2975     0.9383  10.974  < 2e-16 ***
## bs(BMXWT, df = 7)1     13.6283     3.7862   3.600 0.000322 ***
## bs(BMXWT, df = 7)2     26.3346     2.4649  10.684  < 2e-16 ***
## bs(BMXWT, df = 7)3     40.4432     2.7961  14.464  < 2e-16 ***
## bs(BMXWT, df = 7)4     50.3226     2.6827  18.758  < 2e-16 ***
## bs(BMXWT, df = 7)5     79.0649     2.9208  27.069  < 2e-16 ***
## bs(BMXWT, df = 7)6    103.9789     3.2701  31.797  < 2e-16 ***
## bs(BMXWT, df = 7)7    115.4465     3.9069  29.550  < 2e-16 ***
## RIAGENDRMale            0.8196     0.2071   3.958 7.66e-05 ***
## bs(BMXHT, df = 7)1     -6.6685     4.6565  -1.432 0.152178    
## bs(BMXHT, df = 7)2     -9.2048     3.1063  -2.963 0.003059 ** 
## bs(BMXHT, df = 7)3    -14.8818     3.4781  -4.279 1.92e-05 ***
## bs(BMXHT, df = 7)4    -17.1775     3.3611  -5.111 3.33e-07 ***
## bs(BMXHT, df = 7)5    -23.9234     3.5467  -6.745 1.70e-11 ***
## bs(BMXHT, df = 7)6    -29.5770     3.6498  -8.104 6.65e-16 ***
## bs(BMXHT, df = 7)7    -28.6051     4.3907  -6.515 7.99e-11 ***
## years2005-2006         -0.1834     0.3159  -0.580 0.561636    
## years2007-2008         -0.1952     0.2956  -0.660 0.509010    
## years2009-2010         -0.3012     0.2940  -1.024 0.305695    
## years2013-2014          0.0740     0.3064   0.242 0.809162    
## years2015-2016          0.3089     0.3003   1.029 0.303729    
## years2017-2018         -0.1768     0.3118  -0.567 0.570570    
## years2022-2012         -0.4078     0.3098  -1.316 0.188148    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.159 on 4970 degrees of freedom
## Multiple R-squared:  0.8918, Adjusted R-squared:  0.8911 
## F-statistic:  1412 on 29 and 4970 DF,  p-value: < 2.2e-16
## Warning in bs(BMXWT, degree = 3L, knots = c(`20%` = 64.1, `40%` = 73.7, : some
## 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7)*RIAGENDR + bs(BMXWT) + BMXHT + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.421  -3.240   0.117   3.274  30.388 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        127.38026    1.87551  67.918  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1                1.61607    1.12738   1.433  0.15179    
## bs(RIDAGEYR, df = 7)2                0.07844    0.78498   0.100  0.92040    
## bs(RIDAGEYR, df = 7)3                1.53459    0.89048   1.723  0.08489 .  
## bs(RIDAGEYR, df = 7)4                2.14530    0.79534   2.697  0.00701 ** 
## bs(RIDAGEYR, df = 7)5                6.20633    1.00302   6.188 6.60e-10 ***
## bs(RIDAGEYR, df = 7)6                6.17625    1.10113   5.609 2.15e-08 ***
## bs(RIDAGEYR, df = 7)7                9.04269    1.28625   7.030 2.34e-12 ***
## RIAGENDRMale                        -1.05000    0.89340  -1.175  0.23994    
## bs(BMXWT)1                          55.15674    1.80515  30.555  < 2e-16 ***
## bs(BMXWT)2                          88.50642    1.39368  63.506  < 2e-16 ***
## bs(BMXWT)3                         115.28861    2.33313  49.414  < 2e-16 ***
## BMXHT                               -0.45072    0.01074 -41.966  < 2e-16 ***
## years2005-2006                      -0.21290    0.31349  -0.679  0.49708    
## years2007-2008                      -0.19869    0.29351  -0.677  0.49847    
## years2009-2010                      -0.30419    0.29194  -1.042  0.29749    
## years2013-2014                       0.13765    0.30399   0.453  0.65070    
## years2015-2016                       0.36156    0.29805   1.213  0.22515    
## years2017-2018                      -0.18806    0.30942  -0.608  0.54334    
## years2022-2012                      -0.31883    0.30754  -1.037  0.29992    
## bs(RIDAGEYR, df = 7)1:RIAGENDRMale  -1.72731    1.66430  -1.038  0.29938    
## bs(RIDAGEYR, df = 7)2:RIAGENDRMale   1.42978    1.14516   1.249  0.21189    
## bs(RIDAGEYR, df = 7)3:RIAGENDRMale   1.24676    1.29715   0.961  0.33652    
## bs(RIDAGEYR, df = 7)4:RIAGENDRMale   3.07599    1.15245   2.669  0.00763 ** 
## bs(RIDAGEYR, df = 7)5:RIAGENDRMale   3.06589    1.44962   2.115  0.03448 *  
## bs(RIDAGEYR, df = 7)6:RIAGENDRMale   3.47578    1.57629   2.205  0.02750 *  
## bs(RIDAGEYR, df = 7)7:RIAGENDRMale   2.85646    1.84529   1.548  0.12169    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.121 on 4973 degrees of freedom
## Multiple R-squared:  0.8933, Adjusted R-squared:  0.8927 
## F-statistic:  1601 on 26 and 4973 DF,  p-value: < 2.2e-16
## Warning in bs(BMXWT, degree = 3L, knots = numeric(0), Boundary.knots = c(33.2, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7)*RIAGENDR + bs(BMXWT) + RIAGENDR+ BMXHT + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.421  -3.240   0.117   3.274  30.388 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        127.38026    1.87551  67.918  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1                1.61607    1.12738   1.433  0.15179    
## bs(RIDAGEYR, df = 7)2                0.07844    0.78498   0.100  0.92040    
## bs(RIDAGEYR, df = 7)3                1.53459    0.89048   1.723  0.08489 .  
## bs(RIDAGEYR, df = 7)4                2.14530    0.79534   2.697  0.00701 ** 
## bs(RIDAGEYR, df = 7)5                6.20633    1.00302   6.188 6.60e-10 ***
## bs(RIDAGEYR, df = 7)6                6.17625    1.10113   5.609 2.15e-08 ***
## bs(RIDAGEYR, df = 7)7                9.04269    1.28625   7.030 2.34e-12 ***
## RIAGENDRMale                        -1.05000    0.89340  -1.175  0.23994    
## bs(BMXWT)1                          55.15674    1.80515  30.555  < 2e-16 ***
## bs(BMXWT)2                          88.50642    1.39368  63.506  < 2e-16 ***
## bs(BMXWT)3                         115.28861    2.33313  49.414  < 2e-16 ***
## BMXHT                               -0.45072    0.01074 -41.966  < 2e-16 ***
## years2005-2006                      -0.21290    0.31349  -0.679  0.49708    
## years2007-2008                      -0.19869    0.29351  -0.677  0.49847    
## years2009-2010                      -0.30419    0.29194  -1.042  0.29749    
## years2013-2014                       0.13765    0.30399   0.453  0.65070    
## years2015-2016                       0.36156    0.29805   1.213  0.22515    
## years2017-2018                      -0.18806    0.30942  -0.608  0.54334    
## years2022-2012                      -0.31883    0.30754  -1.037  0.29992    
## bs(RIDAGEYR, df = 7)1:RIAGENDRMale  -1.72731    1.66430  -1.038  0.29938    
## bs(RIDAGEYR, df = 7)2:RIAGENDRMale   1.42978    1.14516   1.249  0.21189    
## bs(RIDAGEYR, df = 7)3:RIAGENDRMale   1.24676    1.29715   0.961  0.33652    
## bs(RIDAGEYR, df = 7)4:RIAGENDRMale   3.07599    1.15245   2.669  0.00763 ** 
## bs(RIDAGEYR, df = 7)5:RIAGENDRMale   3.06589    1.44962   2.115  0.03448 *  
## bs(RIDAGEYR, df = 7)6:RIAGENDRMale   3.47578    1.57629   2.205  0.02750 *  
## bs(RIDAGEYR, df = 7)7:RIAGENDRMale   2.85646    1.84529   1.548  0.12169    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.121 on 4973 degrees of freedom
## Multiple R-squared:  0.8933, Adjusted R-squared:  0.8927 
## F-statistic:  1601 on 26 and 4973 DF,  p-value: < 2.2e-16
## Warning in bs(BMXWT, degree = 3L, knots = numeric(0), Boundary.knots = c(33.2, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7)*RIAGENDR + bs(BMXWT) + BMXHT")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -26.5356  -3.2189   0.1307   3.2779  30.2036 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        127.6224     1.8521  68.908  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1                1.6055     1.1274   1.424  0.15449    
## bs(RIDAGEYR, df = 7)2                0.1002     0.7841   0.128  0.89830    
## bs(RIDAGEYR, df = 7)3                1.5198     0.8904   1.707  0.08792 .  
## bs(RIDAGEYR, df = 7)4                2.1726     0.7939   2.736  0.00623 ** 
## bs(RIDAGEYR, df = 7)5                6.1933     1.0023   6.179 6.96e-10 ***
## bs(RIDAGEYR, df = 7)6                6.2261     1.0965   5.678 1.44e-08 ***
## bs(RIDAGEYR, df = 7)7                8.9828     1.2824   7.005 2.81e-12 ***
## RIAGENDRMale                        -0.9844     0.8928  -1.103  0.27023    
## bs(BMXWT)1                          55.0165     1.8038  30.500  < 2e-16 ***
## bs(BMXWT)2                          88.6177     1.3929  63.622  < 2e-16 ***
## bs(BMXWT)3                         115.1933     2.3317  49.403  < 2e-16 ***
## BMXHT                               -0.4526     0.0107 -42.295  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1:RIAGENDRMale  -1.8019     1.6641  -1.083  0.27895    
## bs(RIDAGEYR, df = 7)2:RIAGENDRMale   1.4186     1.1449   1.239  0.21539    
## bs(RIDAGEYR, df = 7)3:RIAGENDRMale   1.2026     1.2966   0.927  0.35372    
## bs(RIDAGEYR, df = 7)4:RIAGENDRMale   3.0045     1.1525   2.607  0.00916 ** 
## bs(RIDAGEYR, df = 7)5:RIAGENDRMale   3.0226     1.4490   2.086  0.03704 *  
## bs(RIDAGEYR, df = 7)6:RIAGENDRMale   3.4198     1.5763   2.170  0.03009 *  
## bs(RIDAGEYR, df = 7)7:RIAGENDRMale   2.8243     1.8454   1.530  0.12596    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.123 on 4980 degrees of freedom
## Multiple R-squared:  0.8931, Adjusted R-squared:  0.8927 
## F-statistic:  2189 on 19 and 4980 DF,  p-value: < 2.2e-16
## Warning in bs(BMXWT, degree = 3L, knots = numeric(0), Boundary.knots = c(33.2, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

# grid.arrange(g1, g2,g3, nrow=3)

with BMI

run_model("BMXWAIST ~ BMXBMI")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.262  -4.663   0.094   4.540  27.853 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 35.72136    0.45795      78   <2e-16 ***
## BMXBMI       2.19416    0.01545     142   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.969 on 4998 degrees of freedom
## Multiple R-squared:  0.8014, Adjusted R-squared:  0.8014 
## F-statistic: 2.017e+04 on 1 and 4998 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ BMXWT + BMXHT")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -30.6397  -3.9707  -0.0834   3.9635  27.4920 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 109.134705   1.446598   75.44   <2e-16 ***
## BMXWT         0.785554   0.004746  165.52   <2e-16 ***
## BMXHT        -0.440891   0.009506  -46.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.009 on 4997 degrees of freedom
## Multiple R-squared:  0.8524, Adjusted R-squared:  0.8523 
## F-statistic: 1.443e+04 on 2 and 4997 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ BMXWT + BMXHT + BMXBMI")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -30.1689  -3.9453  -0.0781   3.9420  26.8295 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 73.38133    6.54753  11.207  < 2e-16 ***
## BMXWT        0.56787    0.03917  14.497  < 2e-16 ***
## BMXHT       -0.22789    0.03921  -5.812 6.56e-09 ***
## BMXBMI       0.61556    0.10996   5.598 2.28e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.991 on 4996 degrees of freedom
## Multiple R-squared:  0.8533, Adjusted R-squared:  0.8532 
## F-statistic:  9687 on 3 and 4996 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(BMXBMI,df=7)")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -26.3967  -4.4326   0.0962   4.3794  30.6575 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           65.241      4.453  14.652  < 2e-16 ***
## bs(BMXBMI, df = 7)1    2.454      6.003   0.409 0.682710    
## bs(BMXBMI, df = 7)2   14.433      4.133   3.492 0.000483 ***
## bs(BMXBMI, df = 7)3   30.037      4.561   6.585 5.01e-11 ***
## bs(BMXBMI, df = 7)4   37.338      4.427   8.434  < 2e-16 ***
## bs(BMXBMI, df = 7)5   63.708      4.688  13.589  < 2e-16 ***
## bs(BMXBMI, df = 7)6   82.629      5.042  16.390  < 2e-16 ***
## bs(BMXBMI, df = 7)7   90.871      5.749  15.807  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.796 on 4992 degrees of freedom
## Multiple R-squared:  0.8113, Adjusted R-squared:  0.8111 
## F-statistic:  3067 on 7 and 4992 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(BMXBMI,df=7)*RIAGENDR")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.4443  -4.0031  -0.0805   3.9834  31.5606 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       63.7504     4.7206  13.505  < 2e-16 ***
## bs(BMXBMI, df = 7)1                2.6174     6.3436   0.413 0.679911    
## bs(BMXBMI, df = 7)2               16.1627     4.4240   3.653 0.000261 ***
## bs(BMXBMI, df = 7)3               28.0847     4.8487   5.792 7.37e-09 ***
## bs(BMXBMI, df = 7)4               36.0256     4.7012   7.663 2.17e-14 ***
## bs(BMXBMI, df = 7)5               61.0694     4.9964  12.223  < 2e-16 ***
## bs(BMXBMI, df = 7)6               78.6538     5.4181  14.517  < 2e-16 ***
## bs(BMXBMI, df = 7)7               94.7887     5.9872  15.832  < 2e-16 ***
## RIAGENDRMale                       6.8027     9.0898   0.748 0.454262    
## bs(BMXBMI, df = 7)1:RIAGENDRMale  -5.4408    12.3183  -0.442 0.658736    
## bs(BMXBMI, df = 7)2:RIAGENDRMale  -5.4968     8.3808  -0.656 0.511931    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  -0.9294     9.3070  -0.100 0.920459    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  -1.5407     9.0282  -0.171 0.864506    
## bs(BMXBMI, df = 7)5:RIAGENDRMale   3.4297     9.5507   0.359 0.719534    
## bs(BMXBMI, df = 7)6:RIAGENDRMale  13.8396    10.1699   1.361 0.173624    
## bs(BMXBMI, df = 7)7:RIAGENDRMale -18.3918    12.2089  -1.506 0.132020    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.141 on 4984 degrees of freedom
## Multiple R-squared:  0.8462, Adjusted R-squared:  0.8458 
## F-statistic:  1828 on 15 and 4984 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.3130  -3.6361   0.0063   3.5849  31.5368 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       55.39297    4.44137  12.472  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1              0.87715    0.93204   0.941  0.34670    
## bs(RIDAGEYR, df = 7)2              0.72428    0.64121   1.130  0.25872    
## bs(RIDAGEYR, df = 7)3              2.47406    0.72685   3.404  0.00067 ***
## bs(RIDAGEYR, df = 7)4              3.10688    0.64641   4.806 1.58e-06 ***
## bs(RIDAGEYR, df = 7)5              6.74146    0.81262   8.296  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6              6.36109    0.88538   7.185 7.74e-13 ***
## bs(RIDAGEYR, df = 7)7              8.53823    1.03397   8.258  < 2e-16 ***
## bs(BMXBMI, df = 7)1               10.07970    5.94613   1.695  0.09011 .  
## bs(BMXBMI, df = 7)2               21.37312    4.14741   5.153 2.66e-07 ***
## bs(BMXBMI, df = 7)3               32.90595    4.54424   7.241 5.13e-13 ***
## bs(BMXBMI, df = 7)4               40.81712    4.40679   9.262  < 2e-16 ***
## bs(BMXBMI, df = 7)5               66.73384    4.68411  14.247  < 2e-16 ***
## bs(BMXBMI, df = 7)6               83.75404    5.07768  16.495  < 2e-16 ***
## bs(BMXBMI, df = 7)7              102.11470    5.61013  18.202  < 2e-16 ***
## RIAGENDRMale                       9.78478    8.52047   1.148  0.25087    
## bs(BMXBMI, df = 7)1:RIAGENDRMale  -9.88677   11.54803  -0.856  0.39196    
## bs(BMXBMI, df = 7)2:RIAGENDRMale  -8.20193    7.85434  -1.044  0.29642    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  -4.10178    8.72403  -0.470  0.63825    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  -4.34299    8.46311  -0.513  0.60786    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  -0.03424    8.94972  -0.004  0.99695    
## bs(BMXBMI, df = 7)6:RIAGENDRMale  12.45905    9.53162   1.307  0.19123    
## bs(BMXBMI, df = 7)7:RIAGENDRMale -24.06824   11.43548  -2.105  0.03537 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.743 on 4977 degrees of freedom
## Multiple R-squared:  0.8657, Adjusted R-squared:  0.8651 
## F-statistic:  1458 on 22 and 4977 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR) + bs(BMXBMI,df=7)*RIAGENDR")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.1655  -3.6332  -0.0267   3.5886  31.4559 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       55.5200     4.4293  12.535  < 2e-16 ***
## bs(RIDAGEYR)1                      0.9191     0.9001   1.021   0.3072    
## bs(RIDAGEYR)2                      5.0388     0.6750   7.465 9.76e-14 ***
## bs(RIDAGEYR)3                      7.6400     0.6017  12.698  < 2e-16 ***
## bs(BMXBMI, df = 7)1               10.3968     5.9419   1.750   0.0802 .  
## bs(BMXBMI, df = 7)2               21.5491     4.1450   5.199 2.09e-07 ***
## bs(BMXBMI, df = 7)3               33.1417     4.5410   7.298 3.37e-13 ***
## bs(BMXBMI, df = 7)4               41.0256     4.4040   9.315  < 2e-16 ***
## bs(BMXBMI, df = 7)5               66.9871     4.6807  14.311  < 2e-16 ***
## bs(BMXBMI, df = 7)6               83.9822     5.0748  16.549  < 2e-16 ***
## bs(BMXBMI, df = 7)7              102.2660     5.6078  18.236  < 2e-16 ***
## RIAGENDRMale                      10.2573     8.5133   1.205   0.2283    
## bs(BMXBMI, df = 7)1:RIAGENDRMale -10.5877    11.5377  -0.918   0.3588    
## bs(BMXBMI, df = 7)2:RIAGENDRMale  -8.5860     7.8486  -1.094   0.2740    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  -4.6051     8.7161  -0.528   0.5973    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  -4.7986     8.4561  -0.567   0.5704    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  -0.5467     8.9420  -0.061   0.9513    
## bs(BMXBMI, df = 7)6:RIAGENDRMale  12.0413     9.5257   1.264   0.2063    
## bs(BMXBMI, df = 7)7:RIAGENDRMale -24.5772    11.4264  -2.151   0.0315 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.742 on 4981 degrees of freedom
## Multiple R-squared:  0.8656, Adjusted R-squared:  0.8651 
## F-statistic:  1783 on 18 and 4981 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXHT")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.2939  -3.2046   0.0333   3.3150  26.7614 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        0.875590   4.314325   0.203 0.839183    
## bs(RIDAGEYR, df = 7)1              0.826852   0.840143   0.984 0.325075    
## bs(RIDAGEYR, df = 7)2              0.707601   0.577984   1.224 0.220913    
## bs(RIDAGEYR, df = 7)3              2.189631   0.655235   3.342 0.000839 ***
## bs(RIDAGEYR, df = 7)4              3.666038   0.582904   6.289 3.46e-10 ***
## bs(RIDAGEYR, df = 7)5              7.722513   0.733063  10.535  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6              7.888814   0.799351   9.869  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             10.795058   0.934394  11.553  < 2e-16 ***
## bs(BMXBMI, df = 7)1               10.202093   5.359830   1.903 0.057041 .  
## bs(BMXBMI, df = 7)2               20.427526   3.738574   5.464 4.88e-08 ***
## bs(BMXBMI, df = 7)3               33.195442   4.096175   8.104 6.63e-16 ***
## bs(BMXBMI, df = 7)4               41.028370   3.972275  10.329  < 2e-16 ***
## bs(BMXBMI, df = 7)5               65.950832   4.222311  15.620  < 2e-16 ***
## bs(BMXBMI, df = 7)6               84.272078   4.577035  18.412  < 2e-16 ***
## bs(BMXBMI, df = 7)7              100.886308   5.057092  19.949  < 2e-16 ***
## RIAGENDRMale                       7.454725   7.680646   0.971 0.331802    
## BMXHT                              0.337095   0.009943  33.903  < 2e-16 ***
## bs(BMXBMI, df = 7)1:RIAGENDRMale -12.172793  10.409587  -1.169 0.242305    
## bs(BMXBMI, df = 7)2:RIAGENDRMale  -9.659010   7.080017  -1.364 0.172546    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  -6.575357   7.864158  -0.836 0.403129    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  -7.038814   7.629044  -0.923 0.356243    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  -1.216254   8.067339  -0.151 0.880169    
## bs(BMXBMI, df = 7)6:RIAGENDRMale   5.405456   8.594300   0.629 0.529405    
## bs(BMXBMI, df = 7)7:RIAGENDRMale -16.081951  10.310608  -1.560 0.118883    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.176 on 4976 degrees of freedom
## Multiple R-squared:  0.8909, Adjusted R-squared:  0.8904 
## F-statistic:  1767 on 23 and 4976 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + bs(BMXHT)")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.1739  -3.2061   0.0263   3.3132  26.9184 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       44.6088     4.1843  10.661  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1              0.8339     0.8402   0.992 0.321019    
## bs(RIDAGEYR, df = 7)2              0.6918     0.5782   1.196 0.231568    
## bs(RIDAGEYR, df = 7)3              2.1998     0.6552   3.357 0.000793 ***
## bs(RIDAGEYR, df = 7)4              3.6646     0.5830   6.285 3.55e-10 ***
## bs(RIDAGEYR, df = 7)5              7.7241     0.7332  10.535  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6              7.8780     0.7993   9.856  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             10.8106     0.9349  11.563  < 2e-16 ***
## bs(BMXBMI, df = 7)1               10.3177     5.3599   1.925 0.054289 .  
## bs(BMXBMI, df = 7)2               20.4662     3.7386   5.474 4.61e-08 ***
## bs(BMXBMI, df = 7)3               33.2882     4.0962   8.127 5.53e-16 ***
## bs(BMXBMI, df = 7)4               41.0916     3.9722  10.345  < 2e-16 ***
## bs(BMXBMI, df = 7)5               66.0708     4.2227  15.647  < 2e-16 ***
## bs(BMXBMI, df = 7)6               84.2406     4.5768  18.406  < 2e-16 ***
## bs(BMXBMI, df = 7)7              101.0498     5.0577  19.979  < 2e-16 ***
## RIAGENDRMale                       7.6526     7.6834   0.996 0.319301    
## bs(BMXHT)1                        12.1370     2.6968   4.501 6.93e-06 ***
## bs(BMXHT)2                        14.5437     1.1485  12.663  < 2e-16 ***
## bs(BMXHT)3                        26.7484     2.2894  11.684  < 2e-16 ***
## bs(BMXBMI, df = 7)1:RIAGENDRMale -12.3965    10.4125  -1.191 0.233889    
## bs(BMXBMI, df = 7)2:RIAGENDRMale  -9.7275     7.0820  -1.374 0.169644    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  -6.7228     7.8659  -0.855 0.392767    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  -7.1456     7.6307  -0.936 0.349099    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  -1.3972     8.0700  -0.173 0.862554    
## bs(BMXBMI, df = 7)6:RIAGENDRMale   5.4456     8.5969   0.633 0.526477    
## bs(BMXBMI, df = 7)7:RIAGENDRMale -16.4055    10.3130  -1.591 0.111727    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.176 on 4974 degrees of freedom
## Multiple R-squared:  0.891,  Adjusted R-squared:  0.8904 
## F-statistic:  1626 on 25 and 4974 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXWT")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.2407  -3.2202   0.0431   3.2603  28.5771 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       42.827914   3.995947  10.718  < 2e-16 ***
## bs(RIDAGEYR, df = 7)1              0.755474   0.835178   0.905  0.36574    
## bs(RIDAGEYR, df = 7)2              0.588719   0.574577   1.025  0.30560    
## bs(RIDAGEYR, df = 7)3              2.025429   0.651431   3.109  0.00189 ** 
## bs(RIDAGEYR, df = 7)4              3.504859   0.579334   6.050 1.56e-09 ***
## bs(RIDAGEYR, df = 7)5              7.559613   0.728533  10.376  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6              7.792854   0.794413   9.810  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             10.327018   0.927916  11.129  < 2e-16 ***
## bs(BMXBMI, df = 7)1                7.771601   5.328521   1.458  0.14477    
## bs(BMXBMI, df = 7)2               14.121786   3.722130   3.794  0.00015 ***
## bs(BMXBMI, df = 7)3               21.612942   4.084717   5.291 1.27e-07 ***
## bs(BMXBMI, df = 7)4               26.747375   3.969215   6.739 1.78e-11 ***
## bs(BMXBMI, df = 7)5               39.967693   4.266501   9.368  < 2e-16 ***
## bs(BMXBMI, df = 7)6               49.332357   4.655212  10.597  < 2e-16 ***
## bs(BMXBMI, df = 7)7               53.818924   5.213349  10.323  < 2e-16 ***
## RIAGENDRMale                       9.428689   7.634898   1.235  0.21691    
## BMXWT                              0.345833   0.009891  34.965  < 2e-16 ***
## bs(BMXBMI, df = 7)1:RIAGENDRMale -12.337911  10.348010  -1.192  0.23320    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -10.659676   7.038343  -1.515  0.12996    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  -8.026813   7.818092  -1.027  0.30461    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  -9.312059   7.584820  -1.228  0.21961    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  -4.362121   8.020482  -0.544  0.58655    
## bs(BMXBMI, df = 7)6:RIAGENDRMale  -5.172570   8.555814  -0.605  0.54549    
## bs(BMXBMI, df = 7)7:RIAGENDRMale -12.865402  10.251928  -1.255  0.20956    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.146 on 4976 degrees of freedom
## Multiple R-squared:  0.8922, Adjusted R-squared:  0.8917 
## F-statistic:  1790 on 23 and 4976 DF,  p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + bs(BMXWT) + bs(BMXHT)")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.2422  -3.2052   0.0235   3.2620  28.9613 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       55.3741    10.5729   5.237 1.70e-07 ***
## bs(RIDAGEYR, df = 7)1              0.7798     0.8353   0.934  0.35058    
## bs(RIDAGEYR, df = 7)2              0.6117     0.5754   1.063  0.28777    
## bs(RIDAGEYR, df = 7)3              2.0287     0.6518   3.112  0.00187 ** 
## bs(RIDAGEYR, df = 7)4              3.5268     0.5800   6.081 1.28e-09 ***
## bs(RIDAGEYR, df = 7)5              7.5847     0.7291  10.403  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6              7.7977     0.7947   9.812  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             10.3700     0.9311  11.137  < 2e-16 ***
## bs(BMXBMI, df = 7)1                6.0488     8.1714   0.740  0.45919    
## bs(BMXBMI, df = 7)2                9.9793    13.5390   0.737  0.46111    
## bs(BMXBMI, df = 7)3               15.1616    20.2024   0.750  0.45300    
## bs(BMXBMI, df = 7)4               18.9148    23.7638   0.796  0.42610    
## bs(BMXBMI, df = 7)5               27.4902    36.1524   0.760  0.44705    
## bs(BMXBMI, df = 7)6               34.0036    41.2106   0.825  0.40934    
## bs(BMXBMI, df = 7)7               37.8790    47.9520   0.790  0.42960    
## RIAGENDRMale                       9.9193     7.7092   1.287  0.19826    
## bs(BMXWT)1                        29.6369    37.2567   0.795  0.42637    
## bs(BMXWT)2                        52.4851    40.0618   1.310  0.19022    
## bs(BMXWT)3                        71.9854    53.6215   1.342  0.17950    
## bs(BMXHT)1                         1.2665    10.4929   0.121  0.90393    
## bs(BMXHT)2                        -5.9958    18.4955  -0.324  0.74582    
## bs(BMXHT)3                        -6.8691    25.4183  -0.270  0.78698    
## bs(BMXBMI, df = 7)1:RIAGENDRMale -12.5355    10.3758  -1.208  0.22705    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -10.9102     7.1122  -1.534  0.12509    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  -8.5410     7.8976  -1.081  0.27954    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  -9.8170     7.6744  -1.279  0.20089    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  -5.1220     8.0992  -0.632  0.52714    
## bs(BMXBMI, df = 7)6:RIAGENDRMale  -4.4335     8.8738  -0.500  0.61737    
## bs(BMXBMI, df = 7)7:RIAGENDRMale -14.0453    10.4057  -1.350  0.17715    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.145 on 4971 degrees of freedom
## Multiple R-squared:  0.8923, Adjusted R-squared:  0.8917 
## F-statistic:  1471 on 28 and 4971 DF,  p-value: < 2.2e-16
## Warning in bs(BMXWT, degree = 3L, knots = numeric(0), Boundary.knots = c(33.2, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXWT + BMXHT + years")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.1584  -3.2198   0.0602   3.2325  28.7521 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       42.798939   6.932160   6.174 7.19e-10 ***
## bs(RIDAGEYR, df = 7)1              0.788208   0.835502   0.943 0.345525    
## bs(RIDAGEYR, df = 7)2              0.580327   0.575441   1.008 0.313268    
## bs(RIDAGEYR, df = 7)3              2.049186   0.652095   3.142 0.001685 ** 
## bs(RIDAGEYR, df = 7)4              3.520726   0.581640   6.053 1.53e-09 ***
## bs(RIDAGEYR, df = 7)5              7.581963   0.730590  10.378  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6              7.781874   0.799491   9.734  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             10.363690   0.936983  11.061  < 2e-16 ***
## bs(BMXBMI, df = 7)1                7.370197   5.341725   1.380 0.167728    
## bs(BMXBMI, df = 7)2               13.812297   3.807579   3.628 0.000289 ***
## bs(BMXBMI, df = 7)3               21.394652   4.341517   4.928 8.58e-07 ***
## bs(BMXBMI, df = 7)4               26.478233   4.362825   6.069 1.38e-09 ***
## bs(BMXBMI, df = 7)5               39.821874   5.381110   7.400 1.59e-13 ***
## bs(BMXBMI, df = 7)6               49.088536   6.418745   7.648 2.44e-14 ***
## bs(BMXBMI, df = 7)7               53.680178   7.901873   6.793 1.22e-11 ***
## RIAGENDRMale                       8.763768   7.654206   1.145 0.252280    
## BMXWT                              0.344164   0.044793   7.683 1.85e-14 ***
## BMXHT                              0.003086   0.044785   0.069 0.945061    
## years2005-2006                    -0.167660   0.314911  -0.532 0.594469    
## years2007-2008                    -0.182922   0.294811  -0.620 0.534975    
## years2009-2010                    -0.298889   0.293273  -1.019 0.308181    
## years2013-2014                     0.085926   0.305622   0.281 0.778606    
## years2015-2016                     0.302430   0.299460   1.010 0.312585    
## years2017-2018                    -0.225789   0.311232  -0.725 0.468198    
## years2022-2012                    -0.376678   0.309125  -1.219 0.223081    
## bs(BMXBMI, df = 7)1:RIAGENDRMale -11.528412  10.365887  -1.112 0.266128    
## bs(BMXBMI, df = 7)2:RIAGENDRMale  -9.978568   7.052975  -1.415 0.157189    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  -7.428316   7.834622  -0.948 0.343104    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  -8.626758   7.604263  -1.134 0.256655    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  -3.837996   8.043396  -0.477 0.633268    
## bs(BMXBMI, df = 7)6:RIAGENDRMale  -4.263056   8.668944  -0.492 0.622909    
## bs(BMXBMI, df = 7)7:RIAGENDRMale -12.218217  10.265995  -1.190 0.234039    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.145 on 4968 degrees of freedom
## Multiple R-squared:  0.8924, Adjusted R-squared:  0.8917 
## F-statistic:  1329 on 31 and 4968 DF,  p-value: < 2.2e-16

# base model:
run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXWT + BMXHT")
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.2366  -3.2207   0.0431   3.2608  28.5724 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       4.272e+01  6.919e+00   6.175 7.16e-10 ***
## bs(RIDAGEYR, df = 7)1             7.556e-01  8.353e-01   0.905 0.365712    
## bs(RIDAGEYR, df = 7)2             5.890e-01  5.748e-01   1.025 0.305583    
## bs(RIDAGEYR, df = 7)3             2.026e+00  6.518e-01   3.108 0.001893 ** 
## bs(RIDAGEYR, df = 7)4             3.505e+00  5.799e-01   6.045 1.61e-09 ***
## bs(RIDAGEYR, df = 7)5             7.560e+00  7.291e-01  10.369  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6             7.793e+00  7.948e-01   9.805  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             1.033e+01  9.309e-01  11.095  < 2e-16 ***
## bs(BMXBMI, df = 7)1               7.777e+00  5.338e+00   1.457 0.145187    
## bs(BMXBMI, df = 7)2               1.414e+01  3.805e+00   3.715 0.000206 ***
## bs(BMXBMI, df = 7)3               2.164e+01  4.340e+00   4.987 6.35e-07 ***
## bs(BMXBMI, df = 7)4               2.678e+01  4.360e+00   6.142 8.80e-10 ***
## bs(BMXBMI, df = 7)5               4.003e+01  5.379e+00   7.441 1.17e-13 ***
## bs(BMXBMI, df = 7)6               4.941e+01  6.416e+00   7.702 1.61e-14 ***
## bs(BMXBMI, df = 7)7               5.393e+01  7.899e+00   6.827 9.72e-12 ***
## RIAGENDRMale                      9.424e+00  7.640e+00   1.233 0.217465    
## BMXWT                             3.450e-01  4.477e-02   7.707 1.54e-14 ***
## BMXHT                             8.164e-04  4.474e-02   0.018 0.985441    
## bs(BMXBMI, df = 7)1:RIAGENDRMale -1.234e+01  1.035e+01  -1.192 0.233253    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -1.066e+01  7.040e+00  -1.514 0.130127    
## bs(BMXBMI, df = 7)3:RIAGENDRMale -8.024e+00  7.821e+00  -1.026 0.304956    
## bs(BMXBMI, df = 7)4:RIAGENDRMale -9.307e+00  7.590e+00  -1.226 0.220190    
## bs(BMXBMI, df = 7)5:RIAGENDRMale -4.355e+00  8.031e+00  -0.542 0.587641    
## bs(BMXBMI, df = 7)6:RIAGENDRMale -5.149e+00  8.653e+00  -0.595 0.551848    
## bs(BMXBMI, df = 7)7:RIAGENDRMale -1.287e+01  1.026e+01  -1.255 0.209654    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.146 on 4975 degrees of freedom
## Multiple R-squared:  0.8922, Adjusted R-squared:  0.8917 
## F-statistic:  1716 on 24 and 4975 DF,  p-value: < 2.2e-16

# grid.arrange(g1, g2,g3, nrow=3)

regression models with variables

base_form <- "BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXWT + BMXHT + "

# MFA 16:1 (Hexadecenoic) (gm)
run_model(paste0(base_form,"DR1TM161"))
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.1862  -3.2077   0.0296   3.2544  28.5658 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       42.675648   6.919932   6.167 7.51e-10 ***
## bs(RIDAGEYR, df = 7)1              0.760276   0.835350   0.910 0.362799    
## bs(RIDAGEYR, df = 7)2              0.593572   0.574871   1.033 0.301874    
## bs(RIDAGEYR, df = 7)3              2.022479   0.651802   3.103 0.001927 ** 
## bs(RIDAGEYR, df = 7)4              3.503722   0.579908   6.042 1.63e-09 ***
## bs(RIDAGEYR, df = 7)5              7.534474   0.729768  10.324  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6              7.766219   0.795478   9.763  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             10.295923   0.931764  11.050  < 2e-16 ***
## bs(BMXBMI, df = 7)1                7.772783   5.338092   1.456 0.145429    
## bs(BMXBMI, df = 7)2               14.132997   3.805517   3.714 0.000206 ***
## bs(BMXBMI, df = 7)3               21.641396   4.339704   4.987 6.35e-07 ***
## bs(BMXBMI, df = 7)4               26.787103   4.360559   6.143 8.73e-10 ***
## bs(BMXBMI, df = 7)5               40.044374   5.379198   7.444 1.14e-13 ***
## bs(BMXBMI, df = 7)6               49.445696   6.416055   7.707 1.55e-14 ***
## bs(BMXBMI, df = 7)7               53.940241   7.899506   6.828 9.62e-12 ***
## RIAGENDRMale                       9.525850   7.641438   1.247 0.212601    
## BMXWT                              0.344828   0.044768   7.702 1.60e-14 ***
## BMXHT                              0.001652   0.044748   0.037 0.970555    
## DR1TM161                          -0.070643   0.084413  -0.837 0.402703    
## bs(BMXBMI, df = 7)1:RIAGENDRMale -12.414530  10.349769  -1.199 0.230391    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -10.731526   7.040788  -1.524 0.127524    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  -8.101492   7.821453  -1.036 0.300344    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  -9.384214   7.591157  -1.236 0.216441    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  -4.434286   8.031539  -0.552 0.580898    
## bs(BMXBMI, df = 7)6:RIAGENDRMale  -5.224766   8.654076  -0.604 0.546048    
## bs(BMXBMI, df = 7)7:RIAGENDRMale -12.972825  10.260101  -1.264 0.206147    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.146 on 4974 degrees of freedom
## Multiple R-squared:  0.8922, Adjusted R-squared:  0.8917 
## F-statistic:  1647 on 25 and 4974 DF,  p-value: < 2.2e-16

hist(data$DR1TM161)

hist(invNorm(data$DR1TM161))

run_model(paste0(base_form,"invNorm(DR1TM161)"))
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.2321  -3.2255   0.0421   3.2613  28.5730 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       4.271e+01  6.924e+00   6.169 7.44e-10 ***
## bs(RIDAGEYR, df = 7)1             7.566e-01  8.355e-01   0.906 0.365227    
## bs(RIDAGEYR, df = 7)2             5.891e-01  5.749e-01   1.025 0.305537    
## bs(RIDAGEYR, df = 7)3             2.026e+00  6.518e-01   3.108 0.001895 ** 
## bs(RIDAGEYR, df = 7)4             3.505e+00  5.800e-01   6.043 1.62e-09 ***
## bs(RIDAGEYR, df = 7)5             7.559e+00  7.296e-01  10.360  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6             7.791e+00  7.955e-01   9.794  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             1.033e+01  9.317e-01  11.083  < 2e-16 ***
## bs(BMXBMI, df = 7)1               7.780e+00  5.339e+00   1.457 0.145082    
## bs(BMXBMI, df = 7)2               1.414e+01  3.806e+00   3.715 0.000206 ***
## bs(BMXBMI, df = 7)3               2.164e+01  4.340e+00   4.986 6.36e-07 ***
## bs(BMXBMI, df = 7)4               2.678e+01  4.361e+00   6.141 8.82e-10 ***
## bs(BMXBMI, df = 7)5               4.003e+01  5.380e+00   7.441 1.17e-13 ***
## bs(BMXBMI, df = 7)6               4.942e+01  6.417e+00   7.701 1.62e-14 ***
## bs(BMXBMI, df = 7)7               5.393e+01  7.900e+00   6.826 9.75e-12 ***
## RIAGENDRMale                      9.434e+00  7.643e+00   1.234 0.217105    
## BMXWT                             3.450e-01  4.477e-02   7.706 1.55e-14 ***
## BMXHT                             8.895e-04  4.476e-02   0.020 0.984144    
## invNorm(DR1TM161)                -4.888e-03  7.734e-02  -0.063 0.949614    
## bs(BMXBMI, df = 7)1:RIAGENDRMale -1.235e+01  1.035e+01  -1.193 0.232943    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -1.067e+01  7.042e+00  -1.515 0.129940    
## bs(BMXBMI, df = 7)3:RIAGENDRMale -8.033e+00  7.823e+00  -1.027 0.304544    
## bs(BMXBMI, df = 7)4:RIAGENDRMale -9.316e+00  7.592e+00  -1.227 0.219878    
## bs(BMXBMI, df = 7)5:RIAGENDRMale -4.364e+00  8.033e+00  -0.543 0.586939    
## bs(BMXBMI, df = 7)6:RIAGENDRMale -5.158e+00  8.655e+00  -0.596 0.551250    
## bs(BMXBMI, df = 7)7:RIAGENDRMale -1.288e+01  1.026e+01  -1.255 0.209407    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.147 on 4974 degrees of freedom
## Multiple R-squared:  0.8922, Adjusted R-squared:  0.8917 
## F-statistic:  1647 on 25 and 4974 DF,  p-value: < 2.2e-16

# Dietary day one sample weight
run_model(paste0(base_form,"WTDRD1"))
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.3057  -3.1911   0.0446   3.2660  28.5400 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       4.240e+01  6.925e+00   6.123 9.88e-10 ***
## bs(RIDAGEYR, df = 7)1             7.668e-01  8.353e-01   0.918 0.358683    
## bs(RIDAGEYR, df = 7)2             5.884e-01  5.748e-01   1.024 0.306089    
## bs(RIDAGEYR, df = 7)3             2.027e+00  6.518e-01   3.111 0.001876 ** 
## bs(RIDAGEYR, df = 7)4             3.522e+00  5.801e-01   6.072 1.36e-09 ***
## bs(RIDAGEYR, df = 7)5             7.530e+00  7.296e-01  10.321  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6             7.793e+00  7.948e-01   9.806  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             1.030e+01  9.314e-01  11.054  < 2e-16 ***
## bs(BMXBMI, df = 7)1               7.828e+00  5.338e+00   1.466 0.142603    
## bs(BMXBMI, df = 7)2               1.420e+01  3.806e+00   3.732 0.000192 ***
## bs(BMXBMI, df = 7)3               2.170e+01  4.340e+00   5.001 5.90e-07 ***
## bs(BMXBMI, df = 7)4               2.684e+01  4.361e+00   6.156 8.04e-10 ***
## bs(BMXBMI, df = 7)5               4.012e+01  5.380e+00   7.459 1.03e-13 ***
## bs(BMXBMI, df = 7)6               4.955e+01  6.417e+00   7.721 1.38e-14 ***
## bs(BMXBMI, df = 7)7               5.402e+01  7.899e+00   6.838 8.99e-12 ***
## RIAGENDRMale                      9.449e+00  7.640e+00   1.237 0.216222    
## BMXWT                             3.442e-01  4.477e-02   7.688 1.79e-14 ***
## BMXHT                             3.263e-03  4.479e-02   0.073 0.941932    
## WTDRD1                           -1.753e-06  1.572e-06  -1.115 0.264801    
## bs(BMXBMI, df = 7)1:RIAGENDRMale -1.243e+01  1.035e+01  -1.201 0.229720    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -1.072e+01  7.040e+00  -1.523 0.127890    
## bs(BMXBMI, df = 7)3:RIAGENDRMale -8.066e+00  7.821e+00  -1.031 0.302415    
## bs(BMXBMI, df = 7)4:RIAGENDRMale -9.355e+00  7.590e+00  -1.232 0.217847    
## bs(BMXBMI, df = 7)5:RIAGENDRMale -4.329e+00  8.031e+00  -0.539 0.589897    
## bs(BMXBMI, df = 7)6:RIAGENDRMale -5.372e+00  8.655e+00  -0.621 0.534860    
## bs(BMXBMI, df = 7)7:RIAGENDRMale -1.261e+01  1.026e+01  -1.229 0.219120    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.146 on 4974 degrees of freedom
## Multiple R-squared:  0.8922, Adjusted R-squared:  0.8917 
## F-statistic:  1647 on 25 and 4974 DF,  p-value: < 2.2e-16

# BMXARML - Upper Arm Length (cm)
run_model(paste0(base_form,"BMXARML"))
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.3497  -3.2137   0.0284   3.2656  28.6088 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       42.66512    6.91996   6.166 7.58e-10 ***
## bs(RIDAGEYR, df = 7)1              0.74740    0.83538   0.895 0.371001    
## bs(RIDAGEYR, df = 7)2              0.59229    0.57485   1.030 0.302906    
## bs(RIDAGEYR, df = 7)3              2.02701    0.65179   3.110 0.001882 ** 
## bs(RIDAGEYR, df = 7)4              3.51399    0.57998   6.059 1.47e-09 ***
## bs(RIDAGEYR, df = 7)5              7.58918    0.72986  10.398  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6              7.84555    0.79702   9.844  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7             10.38092    0.93285  11.128  < 2e-16 ***
## bs(BMXBMI, df = 7)1                7.73798    5.33823   1.450 0.147250    
## bs(BMXBMI, df = 7)2               14.13971    3.80549   3.716 0.000205 ***
## bs(BMXBMI, df = 7)3               21.67687    4.33987   4.995 6.09e-07 ***
## bs(BMXBMI, df = 7)4               26.83560    4.36097   6.154 8.17e-10 ***
## bs(BMXBMI, df = 7)5               40.14602    5.38080   7.461 1.01e-13 ***
## bs(BMXBMI, df = 7)6               49.49629    6.41658   7.714 1.47e-14 ***
## bs(BMXBMI, df = 7)7               54.17270    7.90433   6.854 8.08e-12 ***
## RIAGENDRMale                       9.33984    7.64100   1.222 0.221640    
## BMXWT                              0.34521    0.04477   7.711 1.50e-14 ***
## BMXHT                              0.01029    0.04601   0.224 0.823111    
## BMXARML                           -0.04286    0.04859  -0.882 0.377731    
## bs(BMXBMI, df = 7)1:RIAGENDRMale -12.25223   10.34974  -1.184 0.236541    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -10.55376    7.04116  -1.499 0.133972    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  -7.93088    7.82155  -1.014 0.310643    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  -9.21597    7.59124  -1.214 0.224795    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  -4.27931    8.03138  -0.533 0.594179    
## bs(BMXBMI, df = 7)6:RIAGENDRMale  -5.03157    8.65456  -0.581 0.561012    
## bs(BMXBMI, df = 7)7:RIAGENDRMale -12.88972   10.25933  -1.256 0.209034    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.146 on 4974 degrees of freedom
## Multiple R-squared:  0.8922, Adjusted R-squared:  0.8917 
## F-statistic:  1647 on 25 and 4974 DF,  p-value: < 2.2e-16

#BMXLEG - Upper Leg Length (cm)
run_model(paste0(base_form,"BMXLEG"))
## 
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.7138  -3.2677   0.0116   3.2808  28.9013 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       39.21014    6.77594   5.787 7.62e-09 ***
## bs(RIDAGEYR, df = 7)1              0.72294    0.81748   0.884 0.376549    
## bs(RIDAGEYR, df = 7)2              0.53882    0.56257   0.958 0.338219    
## bs(RIDAGEYR, df = 7)3              1.51739    0.63878   2.375 0.017565 *  
## bs(RIDAGEYR, df = 7)4              2.89230    0.56901   5.083 3.85e-07 ***
## bs(RIDAGEYR, df = 7)5              6.65949    0.71612   9.299  < 2e-16 ***
## bs(RIDAGEYR, df = 7)6              6.75420    0.78098   8.648  < 2e-16 ***
## bs(RIDAGEYR, df = 7)7              9.78833    0.91179  10.735  < 2e-16 ***
## bs(BMXBMI, df = 7)1                7.03414    5.22425   1.346 0.178221    
## bs(BMXBMI, df = 7)2               13.58603    3.72437   3.648 0.000267 ***
## bs(BMXBMI, df = 7)3               21.51386    4.24697   5.066 4.22e-07 ***
## bs(BMXBMI, df = 7)4               26.46936    4.26741   6.203 6.00e-10 ***
## bs(BMXBMI, df = 7)5               39.90578    5.26421   7.581 4.08e-14 ***
## bs(BMXBMI, df = 7)6               48.62258    6.27904   7.744 1.16e-14 ***
## bs(BMXBMI, df = 7)7               55.14802    7.73111   7.133 1.12e-12 ***
## RIAGENDRMale                       9.33699    7.47718   1.249 0.211822    
## BMXWT                              0.33556    0.04382   7.659 2.25e-14 ***
## BMXHT                              0.13076    0.04465   2.929 0.003418 ** 
## BMXLEG                            -0.43111    0.02904 -14.844  < 2e-16 ***
## bs(BMXBMI, df = 7)1:RIAGENDRMale -12.02113   10.12821  -1.187 0.235325    
## bs(BMXBMI, df = 7)2:RIAGENDRMale -10.61473    6.88977  -1.541 0.123466    
## bs(BMXBMI, df = 7)3:RIAGENDRMale  -7.89461    7.65376  -1.031 0.302371    
## bs(BMXBMI, df = 7)4:RIAGENDRMale  -9.14422    7.42838  -1.231 0.218387    
## bs(BMXBMI, df = 7)5:RIAGENDRMale  -4.17364    7.85936  -0.531 0.595414    
## bs(BMXBMI, df = 7)6:RIAGENDRMale  -3.39482    8.46949  -0.401 0.688563    
## bs(BMXBMI, df = 7)7:RIAGENDRMale -15.56371   10.04177  -1.550 0.121230    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.036 on 4974 degrees of freedom
## Multiple R-squared:  0.8968, Adjusted R-squared:  0.8962 
## F-statistic:  1728 on 25 and 4974 DF,  p-value: < 2.2e-16

# grid.arrange(g1, g2,g3, nrow=3)